data<-data %>%
group_by(Day) %>%
mutate(Daily_Reach = mean(Reach, na.rm = TRUE))
gg<-ggplot(data,aes(x=Day,y=Daily_Reach,group=sentiment,color=sentiment)) +
geom_line() + geom_point() +
labs(title = "Sentiment Rearch Transition", x = "Day", y = "Daily Reach") +
theme_minimal()
p <- ggplotly(gg)
p
A decreasing trend in the daily reach of tweets with positive sentiment.
lda.model <- LDA(myDTM, 10, method='Gibbs', control=list(seed=2022))
topic_matrix <- terms(lda.model,10)
topic_matrix
## Topic 1 Topic 2 Topic 3 Topic 4 Topic 5
## [1,] "time" "tatum" "brown" "game" "win"
## [2,] "will" "jayson" "jaylen" "tonight" "get"
## [3,] "thank" "mvp" "trade" "bleedgreen" "bleedgreen"
## [4,] "mazzulla" "embiid" "via" "night" "now"
## [5,] "joe" "kevin" "per" "heat" "let"
## [6,] "coach" "top" "star" "unfinishedbusiness" "don"
## [7,] "tell" "giannis" "says" "finals" "big"
## [8,] "efforts" "jimmy" "without" "series" "new"
## [9,] "sources" "butler" "lillard" "business" "need"
## [10,] "years" "joel" "jordan" "miami" "today"
## Topic 6 Topic 7 Topic 8 Topic 9 Topic 10
## [1,] "can" "just" "team" "back" "season"
## [2,] "like" "got" "year" "horford" "points"
## [3,] "good" "love" "one" "smart" "pts"
## [4,] "play" "fans" "best" "williams" "games"
## [5,] "going" "see" "player" "marcus" "first"
## [6,] "said" "know" "two" "left" "last"
## [7,] "great" "even" "still" "white" "reb"
## [8,] "think" "live" "league" "right" "ast"
## [9,] "better" "fan" "next" "brogdon" "point"
## [10,] "defense" "people" "way" "derrick" "career"
Category 1: Star Player
Topic 2: Centers around Jayson Tatum and related MVP title discussion
Topic 3: Jaylen Brown and potential trade rumors
Category 2: Game
Topic 1: Timing of games/events
Topic 6: Strategies/opinions on games
Category 3: Franchise Management
Topic 8: Team Management
Topic 9: Other players
Category 4: Fan Engagement
Topic 4: Celtics slogan
Topic 5: Appeal for wining and achievements
Topic 7: Fans community
Topic 10: Season statistics & Career achievements
## document topic
## 1 1 7
## 2 2 6
## 3 3 1, 5, 7, 10
## 4 4 6
## 5 5 1, 10
## 6 6 6
Generally, tweets that focus on fan and community engagement tend to have a higher average daily reach compared to other tweets, although these differences are not significant.
data_1<-data
data_1$topic <- as.numeric(as.character(data_1$topic))
## Warning: NAs introduced by coercion
data_1$topic <- recode(data_1$topic, `2` = 1, `3` = 1, `1` = 2, `6` = 2, `8` = 3, `9` = 3, `10` = 4, `4` = 4, `5` = 4, `7` = 4, .default = NA_real_)
tt<-ggplot(data_1,aes(x=Day,y=Daily_Reach,group=as.factor(topic),color=as.factor(topic))) +
geom_line() + geom_point() +
labs(title = "Topic Rearch Transition", x = "Day", y = "Daily Reach") +
theme_minimal()
t <- ggplotly(tt)
t
fan<-subset(data_1,topic==4)
other_topic <- subset(data_1, topic != 4)
t.test(fan$sentiment,other_topic$sentiment,paired=FALSE,alternative="greater")
##
## Welch Two Sample t-test
##
## data: fan$sentiment and other_topic$sentiment
## t = 0.23616, df = 11265, p-value = 0.4067
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -0.01732599 Inf
## sample estimates:
## mean of x mean of y
## 0.2146013 0.2116971
t.test(fan$Daily_Reach,other_topic$Daily_Reach,paired=FALSE,alternative="greater")
##
## Welch Two Sample t-test
##
## data: fan$Daily_Reach and other_topic$Daily_Reach
## t = 0.60437, df = 11007, p-value = 0.2728
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -647.1339 Inf
## sample estimates:
## mean of x mean of y
## 24458.26 24082.42
fan_engage<-subset(data_1,topic==4)
ss <- ggplot(fan_engage, aes(x = Day, y = Daily_Reach, group = sentiment, color = as.factor(sentiment))) +
geom_line() +
geom_point() +
labs(title = "Topic & Sentiment Reach Transition", x = "Day", y = "Daily Reach") +
scale_color_manual(values = c("blue", "red", "green"), labels = c("Negative Sentiment", "Neural Sentiment", "Positive Sentiment"), name = "Topic") + theme_minimal()
s <- ggplotly(ss)
s <- s %>% layout(legend = list(title = list(text = 'sentiment'),
x = 1.05, y = 0.5))
s
Based on a visual inspection of the results, we cannot determine which sentiments exhibit a distinctly different daily reach throughout an entire season.
reg_1<-lm(Daily_Reach~Day,positive_sentiment)
reg_2<-lm(Daily_Reach~Day,negative_sentiment)
reg_3<-lm(Daily_Reach~sentiment+Day,fan_engage)
stargazer(reg_1,reg_2,reg_3,type="text",star.cutoffs=c(.05,.01,.001))
##
## =================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------
## Daily_Reach
## (1) (2) (3)
## -------------------------------------------------------------------------------------------------
## sentiment -818.493
## (672.581)
##
## Day -144.021*** -164.246*** -134.356***
## (5.797) (8.748) (5.501)
##
## Constant 2,823,105.000*** 3,216,784.000*** 2,636,237.000***
## (112,601.000) (169,943.400) (106,932.400)
##
## -------------------------------------------------------------------------------------------------
## Observations 6,020 2,609 5,205
## R2 0.093 0.119 0.103
## Adjusted R2 0.093 0.119 0.103
## Residual Std. Error 33,668.440 (df = 6018) 32,533.770 (df = 2607) 33,255.230 (df = 5202)
## F Statistic 617.201*** (df = 1; 6018) 352.500*** (df = 1; 2607) 299.494*** (df = 2; 5202)
## =================================================================================================
## Note: *p<0.05; **p<0.01; ***p<0.001
plot(positive_sentiment$Daily_Reach~positive_sentiment$Day)
abline(reg_1)

Daily Reach & Positive Sentiment: Each additional day is associated with a decrease of 144.021 units in daily reach, indicating a significant negative trend over time for tweets with positive sentiment.
Daily Reach & Negative Sentiment: Daily reach decreases by 164.246 units with each day, showing a significant negative trend over time for tweets with negative sentiment.
Some Explanations
late-season Performance Decline & Adjustment of Expectations: fans’ expectations of the team diminishes, especially when the team fails to meet those expectations.
Waning Novelty: At the beginning of the season, the new start and new possibilities often stimulate interest and positive sentiment. Over time, this novelty may fade.
Impact of Significant Events: if the team is eliminated early in the playoffs, fans’ sentiments could be negatively impacted by the disappointment in the postseason.
However, we can also apply this second explanation to our competitor, the LA Lakers, even though their results are still significantly better than those of the Celtics.
## [1] 1 0 -1
reg_1la<-lm(Daily_Reach~Day,positive_sentimentla)
reg_2la<-lm(Daily_Reach~Day,negative_sentimentla)
stargazer(reg_1la,reg_2la,type="text",star.cutoffs=c(.05,.01,.001))
##
## ======================================================================
## Dependent variable:
## --------------------------------------------------
## Daily_Reach
## (1) (2)
## ----------------------------------------------------------------------
## Day -39.990*** -32.280***
## (3.659) (5.468)
##
## Constant 784,277.100*** 634,972.200***
## (71,097.650) (106,215.800)
##
## ----------------------------------------------------------------------
## Observations 7,158 2,820
## R2 0.016 0.012
## Adjusted R2 0.016 0.012
## Residual Std. Error 21,511.410 (df = 7156) 20,387.020 (df = 2818)
## F Statistic 119.431*** (df = 1; 7156) 34.856*** (df = 1; 2818)
## ======================================================================
## Note: *p<0.05; **p<0.01; ***p<0.001
For these discussions, the decrease in daily reach occurs at a slightly slower rate.
However, the sentiment score acts as a negative predictor of daily reach within these discussions.
melted_cormat <- melt(cor_matrix)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2)) +
geom_tile(aes(fill=value), color='white') +
scale_fill_gradient2(low='blue', high='red', mid='grey', midpoint=0, limit=c(-1,1), space='Lab', name='Correlation') +
theme_minimal() +
theme(axis.text.x=element_text(angle=45, vjust=1, size=12, hjust=1),
axis.text.y=element_text(size=12)) +
coord_fixed()

data <- data %>%
mutate(Day = as.Date(Day, format="%Y-%m-%d")) %>%
arrange(Day)
\\
\\
ph_1 <- ggplot(data = replypart, aes(x = month_factor, y = Reply, fill = month_factor)) +
geom_boxplot() +
ggtitle("Replies By Months") +
theme_minimal() +
scale_fill_discrete(name = "Month") +
xlab("Month") +
ylab("Replies")
ph_2 <- ggplot(data = likepart, aes(x = month_factor, y = Likes, fill = month_factor)) +
geom_boxplot() +
ggtitle("Like By Months") +
theme_minimal() +
scale_fill_discrete(name = "Month") +
xlab("Month") +
ylab("Likes") +
scale_y_continuous(limits = c(0, 20), oob = scales::squish)
grid.arrange(ph_1, ph_2, ncol = 2)

Replies: The highest number of replies occurred from 2022.02 to 2022.05.
Like: The greatest number of likes were seen from 2022.05 to 2022.07 and in 2022.09.
fan_engage$month <- substr(fan_engage$Day, 1, 7)
Densityplot <- ggplot(fan_engage, aes(x = Daily_Reach)) +
geom_density(aes(fill = month), alpha = 0.4) +
geom_vline(aes(xintercept = mean(Daily_Reach)), linetype = "dashed", color = "red") +
ggtitle("Density Plot for Reach Distribution by Month") +
xlab("Reach Number") +
ylab("Density") +
theme_minimal()
print(Densityplot)

\\
Post All-Star Week to Beginning of Playoff (2024.02-2024.04)
Pre-Playoff Engagement Boost, i.e., to capitalize on the increased engagement before the playoffs to build momentum. Run a series of interactive Twitter campaigns starting from February that encourage replies.
transition_freq <- transitions %>%
group_by(Transition) %>%
summarise(Frequency = n())
edges <- strsplit(as.character(transition_freq$Transition), "->") %>%
lapply(function(x) if (!any(is.na(x))) cbind(x[1], x[2])) %>%
do.call(rbind, .) %>%
na.omit() %>%
as.matrix()
filtered_edges <- edges[transition_freq$Frequency >= 5, ]
network <- graph_from_edgelist(filtered_edges, directed = TRUE)
E(network)$weight <- transition_freq$Frequency[transition_freq$Frequency >= 5]
\\
Fan Engagement Predictor: Discussions about the fan community (topic 7) two months ago significantly predict the popularity of discussions related to appeals for winning and achievements (topic 5), which in turn predict the popularity of discussions about the Celtics slogan, such as “green bloods” (topic 4), two months later.
Other Predictor: 1: Discussions about strategies/opinions on games (topic 6) predict the popularity of conversations about the Celtics slogan (topic 4) two months later.
Other Predictor: 2: Discussions about team management (topic 8) significantly predict the popularity of conversations about the fan community (topic 7) two months later.
\\
\\
\\